Index(['Issue_id', 'Priority', 'Component', 'Duplicated_issue', 'Title',
'Description', 'Status', 'Resolution', 'Version', 'Created_time',
'Resolved_time'],
dtype='object')
Issue_id ... Description
35611 190066 ... Build ID: M20070212-1330; ; Steps To Reproduce...
5343 14151 ... The code formatter does not respect the maximu...
[2 rows x 5 columns]
import html
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
def clean_up(text):
# convert html escapes like & to characters.
text = html.unescape(text)
# tags like <tab>
text = re.sub(r'<[^<>]*>', ' ', text)
# markdown URLs like [Some text](https://....)
text = re.sub(r'\[([^\[\]]*)\]\([^\(\)]*\)', r'\1', text)
# text or code in brackets like [0]
text = re.sub(r'\[[^\[\]]*\]', ' ', text)
# standalone sequences of specials, matches &# but not #cool
text = re.sub(r'(?:^|\s)[&#<>{}\[\]+|\\:-]{1,}(?:\s|$)', ' ', text)
# standalone sequences of hyphens like --- or ==
text = re.sub(r'(?:^|\s)[\-=\+]{2,}(?:\s|$)', ' ', text)
# sequences of white spaces
text = re.sub(r'\s+', ' ', text)
# lower case and stop word removal - optional but generally useful for classfication
tokens = word_tokenize(text)
stops = set(stopwords.words('english'))
tokens = [token.lower() for token in tokens if token.isalpha()
and token not in stops]
text = ' '.join(tokens)
return text.strip()
df['text'] = df['text'].apply(clean_up)
df = df[df['text'].str.len() > 50]
df.sample(2) Priority text
9297 P3 remove unused import quickfix sometimes doesnt...
3612 P3 browser perspective drag drop disabled drag dr...
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(df['text'], # X values
df['Priority'], # Y values
test_size = 0.2, # test size
random_state = 42, # random shuffle
stratify = df['Priority'])
print('Size of Training Data ', X_train.shape[0])Size of Training Data 35736
Size of Test Data 8935
# import the function
from sklearn.linear_model import LogisticRegression
# set up blank model
logreg = LogisticRegression(max_iter = 10000)
# fit the data
logreg.fit(tv_train, Y_train)
# predict new data LogisticRegression(max_iter=10000)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression(max_iter=10000)
MultinomialNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MultinomialNB()
LinearSVC()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearSVC()
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from matplotlib import pyplot as plt
cm = confusion_matrix(Y_test, y_log, labels = logreg.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix = cm,
display_labels = logreg.classes_)
disp.plot()<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay object at 0x0000000007CBBFD0>
precision recall f1-score support
P1 0.38 0.01 0.03 220
P2 0.43 0.00 0.01 602
P3 0.88 1.00 0.94 7836
P4 0.53 0.07 0.12 227
P5 0.00 0.00 0.00 50
accuracy 0.88 8935
macro avg 0.44 0.22 0.22 8935
weighted avg 0.82 0.88 0.83 8935
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
precision recall f1-score support
P1 0.00 0.00 0.00 220
P2 0.00 0.00 0.00 602
P3 0.88 1.00 0.93 7836
P4 0.00 0.00 0.00 227
P5 0.00 0.00 0.00 50
accuracy 0.88 8935
macro avg 0.18 0.20 0.19 8935
weighted avg 0.77 0.88 0.82 8935
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
# Filter bug reports with priority P3 and sample 4000 rows from it
df_sampleP3 = df[df['Priority'] == 'P3'].sample(n=4000)
# Create a separate DataFrame containing all other bug reports
df_sampleRest = df[df['Priority'] != 'P3']
# Concatenate the two DataFrame to create the new balanced bug reports dataset
df_balanced = pd.concat([df_sampleRest, df_sampleP3])
# quick example
# df_balanced = df.groupby(df.Priority).sample(n=252)
# Check the status of the class imbalance
df_balanced['Priority'].value_counts()Priority
P3 4000
P2 3007
P4 1137
P1 1100
P5 251
Name: count, dtype: int64
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(df_balanced['text'], # X values
df_balanced['Priority'], # Y values
test_size = 0.2, # test size
random_state = 42, # random shuffle
stratify = df_balanced['Priority'])
print('Size of Training Data ', X_train.shape[0])Size of Training Data 7596
Size of Test Data 1899
LinearSVC()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearSVC()
precision recall f1-score support
P1 0.42 0.27 0.33 220
P2 0.42 0.43 0.42 602
P3 0.53 0.65 0.59 800
P4 0.49 0.34 0.41 227
P5 0.00 0.00 0.00 50
accuracy 0.48 1899
macro avg 0.37 0.34 0.35 1899
weighted avg 0.46 0.48 0.47 1899
# our flattening function from last time
import numpy as np
def document_vectorizer(corpus, model, num_features):
vocabulary = set(model.wv.index_to_key)
def average_word_vectors(words, model, vocabulary, num_features):
feature_vector = np.zeros((num_features,), dtype="float64")
nwords = 0.
for word in words:
if word in vocabulary:
nwords = nwords + 1.
feature_vector = np.add(feature_vector, model.wv[word])
if nwords:
feature_vector = np.divide(feature_vector, nwords)
return feature_vector
features = [average_word_vectors(tokenized_sentence, model, vocabulary, num_features)
for tokenized_sentence in corpus]
return np.array(features)from gensim.models import Word2Vec
# train the model on the data
our_model = Word2Vec(X_train,
vector_size = 500, #dimensions
window = 5, #window size
sg = 0, #cbow
min_count = 1,
workers = 4)
# generate averaged word vector features from word2vec model
avg_wv_train_features = document_vectorizer(corpus = X_train,
model = our_model,
num_features = 500)
# generate averaged word vector features from word2vec model
avg_wv_test_features = document_vectorizer(corpus = X_test,
model = our_model,
num_features = 500)
avg_wv_train_features.shape(7596, 500)
(1899, 500)
LinearSVC()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearSVC()
precision recall f1-score support
P1 0.70 0.03 0.06 220
P2 0.29 0.10 0.15 602
P3 0.43 0.90 0.58 800
P4 0.00 0.00 0.00 227
P5 0.00 0.00 0.00 50
accuracy 0.42 1899
macro avg 0.28 0.21 0.16 1899
weighted avg 0.36 0.42 0.30 1899
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
from keras.preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import Dense, Input, GlobalMaxPooling1D
from keras.layers import Conv1D, MaxPooling1D, Embedding, LSTM
from keras.models import Model, Sequential
from keras.initializers import Constant
MAX_SEQUENCE_LENGTH = 1000
MAX_NUM_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2# Vectorize these text samples into a 2D integer tensor using Keras Tokenizer
# Tokenizer is fit on training data only, and that is used to tokenize both train and test data.
tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
tokenizer.fit_on_texts(X_train)
train_sequences = tokenizer.texts_to_sequences(X_train) #Converting text to a vector of word indexes
test_sequences = tokenizer.texts_to_sequences(X_test)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))Found 16573 unique tokens.
#Converting this to sequences to be fed into neural network. Max seq. len is
#1000 as set earlier. Initial padding of 0s, until vector is of
#size MAX_SEQUENCE_LENGTH
trainvalid_data = pad_sequences(train_sequences, maxlen=MAX_SEQUENCE_LENGTH)
test_data = pad_sequences(test_sequences, maxlen=MAX_SEQUENCE_LENGTH)
trainvalid_labels = pd.get_dummies(Y_train)
test_labels = pd.get_dummies(Y_test)
num_validation_samples = int(VALIDATION_SPLIT * trainvalid_data.shape[0])
x_train = trainvalid_data[:-num_validation_samples]
y_train = trainvalid_labels[:-num_validation_samples]
x_val = trainvalid_data[-num_validation_samples:]
y_val = trainvalid_labels[-num_validation_samples:]
#This is the data we will use for CNN and RNN trainingSequential model in keras, we can stack together hidden layers that run in ordercnnmodel = Sequential() # type of model
cnnmodel.add(Embedding(MAX_NUM_WORDS, 128))
cnnmodel.add(Conv1D(128, 5, activation='relu'))
cnnmodel.add(MaxPooling1D(5))
cnnmodel.add(Conv1D(128, 5, activation='relu'))
cnnmodel.add(MaxPooling1D(5))
cnnmodel.add(Conv1D(128, 5, activation='relu'))
cnnmodel.add(GlobalMaxPooling1D())
cnnmodel.add(Dense(128, activation='relu'))
cnnmodel.add(Dense(y_train.shape[1], activation='softmax'))
cnnmodel.compile(loss='categorical_crossentropy',
optimizer = 'rmsprop',
metrics = ['acc']) # end with how to get to the output Epoch 1/5
1/48 [..............................] - ETA: 1:02 - loss: 1.5930 - acc: 0.4922
2/48 [>.............................] - ETA: 23s - loss: 1.5629 - acc: 0.4688
3/48 [>.............................] - ETA: 23s - loss: 1.5185 - acc: 0.4271
4/48 [=>............................] - ETA: 23s - loss: 1.4958 - acc: 0.4062
5/48 [==>...........................] - ETA: 23s - loss: 1.4850 - acc: 0.3953
6/48 [==>...........................] - ETA: 24s - loss: 1.4593 - acc: 0.3945
7/48 [===>..........................] - ETA: 24s - loss: 1.4328 - acc: 0.3984
8/48 [====>.........................] - ETA: 24s - loss: 1.4296 - acc: 0.3945
9/48 [====>.........................] - ETA: 23s - loss: 1.4201 - acc: 0.3854
10/48 [=====>........................] - ETA: 23s - loss: 1.4034 - acc: 0.3930
11/48 [=====>........................] - ETA: 22s - loss: 1.4004 - acc: 0.3963
12/48 [======>.......................] - ETA: 22s - loss: 1.3953 - acc: 0.4023
13/48 [=======>......................] - ETA: 21s - loss: 1.3919 - acc: 0.4044
14/48 [=======>......................] - ETA: 21s - loss: 1.3909 - acc: 0.4090
15/48 [========>.....................] - ETA: 20s - loss: 1.3891 - acc: 0.4115
16/48 [=========>....................] - ETA: 20s - loss: 1.3896 - acc: 0.4121
17/48 [=========>....................] - ETA: 19s - loss: 1.3880 - acc: 0.4122
18/48 [==========>...................] - ETA: 19s - loss: 1.3825 - acc: 0.4162
19/48 [==========>...................] - ETA: 18s - loss: 1.3860 - acc: 0.4153
20/48 [===========>..................] - ETA: 18s - loss: 1.3864 - acc: 0.4129
21/48 [============>.................] - ETA: 17s - loss: 1.3861 - acc: 0.4089
22/48 [============>.................] - ETA: 16s - loss: 1.3824 - acc: 0.4123
23/48 [=============>................] - ETA: 16s - loss: 1.3815 - acc: 0.4107
24/48 [==============>...............] - ETA: 15s - loss: 1.3785 - acc: 0.4134
25/48 [==============>...............] - ETA: 14s - loss: 1.3782 - acc: 0.4141
26/48 [===============>..............] - ETA: 14s - loss: 1.3750 - acc: 0.4180
27/48 [===============>..............] - ETA: 13s - loss: 1.3768 - acc: 0.4167
28/48 [================>.............] - ETA: 13s - loss: 1.3789 - acc: 0.4149
29/48 [=================>............] - ETA: 12s - loss: 1.3765 - acc: 0.4162
30/48 [=================>............] - ETA: 11s - loss: 1.3731 - acc: 0.4190
31/48 [==================>...........] - ETA: 11s - loss: 1.3722 - acc: 0.4189
32/48 [===================>..........] - ETA: 10s - loss: 1.3722 - acc: 0.4182
33/48 [===================>..........] - ETA: 9s - loss: 1.3710 - acc: 0.4183
34/48 [====================>.........] - ETA: 9s - loss: 1.3678 - acc: 0.4193
35/48 [====================>.........] - ETA: 8s - loss: 1.3677 - acc: 0.4187
36/48 [=====================>........] - ETA: 7s - loss: 1.3682 - acc: 0.4162
37/48 [======================>.......] - ETA: 7s - loss: 1.3686 - acc: 0.4164
38/48 [======================>.......] - ETA: 6s - loss: 1.3673 - acc: 0.4176
39/48 [=======================>......] - ETA: 5s - loss: 1.3690 - acc: 0.4149
40/48 [========================>.....] - ETA: 5s - loss: 1.3688 - acc: 0.4148
41/48 [========================>.....] - ETA: 4s - loss: 1.3675 - acc: 0.4133
42/48 [=========================>....] - ETA: 3s - loss: 1.3659 - acc: 0.4122
43/48 [=========================>....] - ETA: 3s - loss: 1.3652 - acc: 0.4130
44/48 [==========================>...] - ETA: 2s - loss: 1.3656 - acc: 0.4118
45/48 [===========================>..] - ETA: 1s - loss: 1.3653 - acc: 0.4128
46/48 [===========================>..] - ETA: 1s - loss: 1.3633 - acc: 0.4139
47/48 [============================>.] - ETA: 0s - loss: 1.3629 - acc: 0.4142
48/48 [==============================] - ETA: 0s - loss: 1.3624 - acc: 0.4143
48/48 [==============================] - 34s 699ms/step - loss: 1.3624 - acc: 0.4143 - val_loss: 1.2983 - val_acc: 0.4259
Epoch 2/5
1/48 [..............................] - ETA: 30s - loss: 1.2582 - acc: 0.5156
2/48 [>.............................] - ETA: 29s - loss: 1.3052 - acc: 0.4648
3/48 [>.............................] - ETA: 29s - loss: 1.3016 - acc: 0.4583
4/48 [=>............................] - ETA: 28s - loss: 1.3104 - acc: 0.4375
5/48 [==>...........................] - ETA: 28s - loss: 1.3193 - acc: 0.4203
6/48 [==>...........................] - ETA: 27s - loss: 1.3271 - acc: 0.4128
7/48 [===>..........................] - ETA: 27s - loss: 1.3226 - acc: 0.4096
8/48 [====>.........................] - ETA: 26s - loss: 1.3285 - acc: 0.4131
9/48 [====>.........................] - ETA: 26s - loss: 1.3263 - acc: 0.4123
10/48 [=====>........................] - ETA: 25s - loss: 1.3281 - acc: 0.4117
11/48 [=====>........................] - ETA: 25s - loss: 1.3266 - acc: 0.4148
12/48 [======>.......................] - ETA: 24s - loss: 1.3318 - acc: 0.4141
13/48 [=======>......................] - ETA: 23s - loss: 1.3321 - acc: 0.4129
14/48 [=======>......................] - ETA: 23s - loss: 1.3330 - acc: 0.4135
15/48 [========>.....................] - ETA: 22s - loss: 1.3307 - acc: 0.4151
16/48 [=========>....................] - ETA: 21s - loss: 1.3312 - acc: 0.4141
17/48 [=========>....................] - ETA: 21s - loss: 1.3285 - acc: 0.4168
18/48 [==========>...................] - ETA: 20s - loss: 1.3325 - acc: 0.4171
19/48 [==========>...................] - ETA: 19s - loss: 1.3340 - acc: 0.4149
20/48 [===========>..................] - ETA: 18s - loss: 1.3321 - acc: 0.4172
21/48 [============>.................] - ETA: 18s - loss: 1.3339 - acc: 0.4159
22/48 [============>.................] - ETA: 17s - loss: 1.3377 - acc: 0.4123
23/48 [=============>................] - ETA: 16s - loss: 1.3363 - acc: 0.4127
24/48 [==============>...............] - ETA: 16s - loss: 1.3379 - acc: 0.4105
25/48 [==============>...............] - ETA: 15s - loss: 1.3378 - acc: 0.4119
26/48 [===============>..............] - ETA: 14s - loss: 1.3377 - acc: 0.4117
27/48 [===============>..............] - ETA: 14s - loss: 1.3358 - acc: 0.4146
28/48 [================>.............] - ETA: 13s - loss: 1.3365 - acc: 0.4146
29/48 [=================>............] - ETA: 12s - loss: 1.3362 - acc: 0.4149
30/48 [=================>............] - ETA: 12s - loss: 1.3357 - acc: 0.4169
31/48 [==================>...........] - ETA: 11s - loss: 1.3354 - acc: 0.4166
32/48 [===================>..........] - ETA: 10s - loss: 1.3352 - acc: 0.4167
33/48 [===================>..........] - ETA: 10s - loss: 1.3346 - acc: 0.4181
34/48 [====================>.........] - ETA: 9s - loss: 1.3335 - acc: 0.4175
35/48 [====================>.........] - ETA: 8s - loss: 1.3326 - acc: 0.4163
36/48 [=====================>........] - ETA: 8s - loss: 1.3325 - acc: 0.4160
37/48 [======================>.......] - ETA: 7s - loss: 1.3333 - acc: 0.4166
38/48 [======================>.......] - ETA: 6s - loss: 1.3348 - acc: 0.4163
39/48 [=======================>......] - ETA: 6s - loss: 1.3349 - acc: 0.4153
40/48 [========================>.....] - ETA: 5s - loss: 1.3348 - acc: 0.4158
41/48 [========================>.....] - ETA: 4s - loss: 1.3350 - acc: 0.4169
42/48 [=========================>....] - ETA: 4s - loss: 1.3351 - acc: 0.4182
43/48 [=========================>....] - ETA: 3s - loss: 1.3353 - acc: 0.4184
44/48 [==========================>...] - ETA: 2s - loss: 1.3343 - acc: 0.4192
45/48 [===========================>..] - ETA: 2s - loss: 1.3345 - acc: 0.4191
46/48 [===========================>..] - ETA: 1s - loss: 1.3342 - acc: 0.4183
47/48 [============================>.] - ETA: 0s - loss: 1.3338 - acc: 0.4184
48/48 [==============================] - ETA: 0s - loss: 1.3338 - acc: 0.4181
48/48 [==============================] - 34s 715ms/step - loss: 1.3338 - acc: 0.4181 - val_loss: 1.2890 - val_acc: 0.4279
Epoch 3/5
1/48 [..............................] - ETA: 29s - loss: 1.3984 - acc: 0.3906
2/48 [>.............................] - ETA: 30s - loss: 1.3683 - acc: 0.3828
3/48 [>.............................] - ETA: 29s - loss: 1.3551 - acc: 0.3906
4/48 [=>............................] - ETA: 28s - loss: 1.3399 - acc: 0.3945
5/48 [==>...........................] - ETA: 28s - loss: 1.3558 - acc: 0.3859
6/48 [==>...........................] - ETA: 27s - loss: 1.3367 - acc: 0.3932
7/48 [===>..........................] - ETA: 26s - loss: 1.3451 - acc: 0.3962
8/48 [====>.........................] - ETA: 26s - loss: 1.3343 - acc: 0.4023
9/48 [====>.........................] - ETA: 25s - loss: 1.3343 - acc: 0.3993
10/48 [=====>........................] - ETA: 25s - loss: 1.3310 - acc: 0.4031
11/48 [=====>........................] - ETA: 24s - loss: 1.3362 - acc: 0.4013
12/48 [======>.......................] - ETA: 23s - loss: 1.3363 - acc: 0.4004
13/48 [=======>......................] - ETA: 23s - loss: 1.3414 - acc: 0.3948
14/48 [=======>......................] - ETA: 22s - loss: 1.3382 - acc: 0.3979
15/48 [========>.....................] - ETA: 21s - loss: 1.3417 - acc: 0.3943
16/48 [=========>....................] - ETA: 21s - loss: 1.3364 - acc: 0.4062
17/48 [=========>....................] - ETA: 20s - loss: 1.3313 - acc: 0.4072
18/48 [==========>...................] - ETA: 19s - loss: 1.3300 - acc: 0.4089
19/48 [==========>...................] - ETA: 19s - loss: 1.3271 - acc: 0.4116
20/48 [===========>..................] - ETA: 18s - loss: 1.3258 - acc: 0.4102
21/48 [============>.................] - ETA: 18s - loss: 1.3224 - acc: 0.4133
22/48 [============>.................] - ETA: 17s - loss: 1.3169 - acc: 0.4162
23/48 [=============>................] - ETA: 16s - loss: 1.3123 - acc: 0.4202
24/48 [==============>...............] - ETA: 16s - loss: 1.3090 - acc: 0.4202
25/48 [==============>...............] - ETA: 15s - loss: 1.3104 - acc: 0.4203
26/48 [===============>..............] - ETA: 14s - loss: 1.3101 - acc: 0.4210
27/48 [===============>..............] - ETA: 14s - loss: 1.3063 - acc: 0.4227
28/48 [================>.............] - ETA: 13s - loss: 1.3095 - acc: 0.4219
29/48 [=================>............] - ETA: 12s - loss: 1.3111 - acc: 0.4205
30/48 [=================>............] - ETA: 12s - loss: 1.3105 - acc: 0.4201
31/48 [==================>...........] - ETA: 11s - loss: 1.3113 - acc: 0.4196
32/48 [===================>..........] - ETA: 10s - loss: 1.3114 - acc: 0.4189
33/48 [===================>..........] - ETA: 10s - loss: 1.3118 - acc: 0.4212
34/48 [====================>.........] - ETA: 9s - loss: 1.3127 - acc: 0.4200
35/48 [====================>.........] - ETA: 8s - loss: 1.3129 - acc: 0.4183
36/48 [=====================>........] - ETA: 7s - loss: 1.3176 - acc: 0.4169
37/48 [======================>.......] - ETA: 7s - loss: 1.3185 - acc: 0.4181
38/48 [======================>.......] - ETA: 6s - loss: 1.3184 - acc: 0.4178
39/48 [=======================>......] - ETA: 6s - loss: 1.3153 - acc: 0.4201
40/48 [========================>.....] - ETA: 5s - loss: 1.3162 - acc: 0.4209
41/48 [========================>.....] - ETA: 4s - loss: 1.3166 - acc: 0.4207
42/48 [=========================>....] - ETA: 4s - loss: 1.3158 - acc: 0.4211
43/48 [=========================>....] - ETA: 3s - loss: 1.3151 - acc: 0.4213
44/48 [==========================>...] - ETA: 2s - loss: 1.3155 - acc: 0.4212
45/48 [===========================>..] - ETA: 2s - loss: 1.3139 - acc: 0.4217
46/48 [===========================>..] - ETA: 1s - loss: 1.3144 - acc: 0.4202
47/48 [============================>.] - ETA: 0s - loss: 1.3137 - acc: 0.4212
48/48 [==============================] - ETA: 0s - loss: 1.3115 - acc: 0.4224
48/48 [==============================] - 34s 707ms/step - loss: 1.3115 - acc: 0.4224 - val_loss: 1.3348 - val_acc: 0.4259
Epoch 4/5
1/48 [..............................] - ETA: 32s - loss: 1.3714 - acc: 0.3984
2/48 [>.............................] - ETA: 30s - loss: 1.3240 - acc: 0.4453
3/48 [>.............................] - ETA: 29s - loss: 1.3102 - acc: 0.4401
4/48 [=>............................] - ETA: 29s - loss: 1.2682 - acc: 0.4609
5/48 [==>...........................] - ETA: 28s - loss: 1.2781 - acc: 0.4469
6/48 [==>...........................] - ETA: 28s - loss: 1.2553 - acc: 0.4622
7/48 [===>..........................] - ETA: 27s - loss: 1.2440 - acc: 0.4654
8/48 [====>.........................] - ETA: 26s - loss: 1.2431 - acc: 0.4658
9/48 [====>.........................] - ETA: 26s - loss: 1.2454 - acc: 0.4618
10/48 [=====>........................] - ETA: 25s - loss: 1.2482 - acc: 0.4555
11/48 [=====>........................] - ETA: 25s - loss: 1.2449 - acc: 0.4638
12/48 [======>.......................] - ETA: 24s - loss: 1.2483 - acc: 0.4655
13/48 [=======>......................] - ETA: 23s - loss: 1.2510 - acc: 0.4627
14/48 [=======>......................] - ETA: 23s - loss: 1.2534 - acc: 0.4587
15/48 [========>.....................] - ETA: 22s - loss: 1.2538 - acc: 0.4573
16/48 [=========>....................] - ETA: 21s - loss: 1.2534 - acc: 0.4580
17/48 [=========>....................] - ETA: 21s - loss: 1.2468 - acc: 0.4605
18/48 [==========>...................] - ETA: 20s - loss: 1.2519 - acc: 0.4570
19/48 [==========>...................] - ETA: 19s - loss: 1.2537 - acc: 0.4576
20/48 [===========>..................] - ETA: 18s - loss: 1.2506 - acc: 0.4566
21/48 [============>.................] - ETA: 18s - loss: 1.2516 - acc: 0.4550
22/48 [============>.................] - ETA: 17s - loss: 1.2523 - acc: 0.4545
23/48 [=============>................] - ETA: 16s - loss: 1.2493 - acc: 0.4555
24/48 [==============>...............] - ETA: 16s - loss: 1.2485 - acc: 0.4564
25/48 [==============>...............] - ETA: 15s - loss: 1.2430 - acc: 0.4603
26/48 [===============>..............] - ETA: 14s - loss: 1.2474 - acc: 0.4600
27/48 [===============>..............] - ETA: 14s - loss: 1.2479 - acc: 0.4633
28/48 [================>.............] - ETA: 13s - loss: 1.2493 - acc: 0.4637
29/48 [=================>............] - ETA: 12s - loss: 1.2519 - acc: 0.4612
30/48 [=================>............] - ETA: 12s - loss: 1.2494 - acc: 0.4620
31/48 [==================>...........] - ETA: 11s - loss: 1.2493 - acc: 0.4627
32/48 [===================>..........] - ETA: 10s - loss: 1.2501 - acc: 0.4641
33/48 [===================>..........] - ETA: 10s - loss: 1.2491 - acc: 0.4645
34/48 [====================>.........] - ETA: 9s - loss: 1.2498 - acc: 0.4637
35/48 [====================>.........] - ETA: 8s - loss: 1.2492 - acc: 0.4654
36/48 [=====================>........] - ETA: 8s - loss: 1.2521 - acc: 0.4627
37/48 [======================>.......] - ETA: 7s - loss: 1.2541 - acc: 0.4603
38/48 [======================>.......] - ETA: 6s - loss: 1.2534 - acc: 0.4597
39/48 [=======================>......] - ETA: 6s - loss: 1.2551 - acc: 0.4593
40/48 [========================>.....] - ETA: 5s - loss: 1.2562 - acc: 0.4604
41/48 [========================>.....] - ETA: 4s - loss: 1.2542 - acc: 0.4613
42/48 [=========================>....] - ETA: 4s - loss: 1.2535 - acc: 0.4611
43/48 [=========================>....] - ETA: 3s - loss: 1.2534 - acc: 0.4608
44/48 [==========================>...] - ETA: 2s - loss: 1.2525 - acc: 0.4602
45/48 [===========================>..] - ETA: 2s - loss: 1.2517 - acc: 0.4604
46/48 [===========================>..] - ETA: 1s - loss: 1.2522 - acc: 0.4594
47/48 [============================>.] - ETA: 0s - loss: 1.2527 - acc: 0.4591
48/48 [==============================] - ETA: 0s - loss: 1.2528 - acc: 0.4583
48/48 [==============================] - 34s 710ms/step - loss: 1.2528 - acc: 0.4583 - val_loss: 1.2656 - val_acc: 0.4575
Epoch 5/5
1/48 [..............................] - ETA: 31s - loss: 1.1485 - acc: 0.5703
2/48 [>.............................] - ETA: 30s - loss: 1.1321 - acc: 0.5508
3/48 [>.............................] - ETA: 30s - loss: 1.1256 - acc: 0.5625
4/48 [=>............................] - ETA: 29s - loss: 1.1545 - acc: 0.5566
5/48 [==>...........................] - ETA: 28s - loss: 1.1522 - acc: 0.5500
6/48 [==>...........................] - ETA: 27s - loss: 1.1650 - acc: 0.5339
7/48 [===>..........................] - ETA: 27s - loss: 1.1728 - acc: 0.5246
8/48 [====>.........................] - ETA: 27s - loss: 1.1981 - acc: 0.5068
9/48 [====>.........................] - ETA: 26s - loss: 1.1924 - acc: 0.5095
10/48 [=====>........................] - ETA: 25s - loss: 1.1773 - acc: 0.5133
11/48 [=====>........................] - ETA: 24s - loss: 1.1619 - acc: 0.5213
12/48 [======>.......................] - ETA: 24s - loss: 1.1591 - acc: 0.5156
13/48 [=======>......................] - ETA: 23s - loss: 1.1585 - acc: 0.5126
14/48 [=======>......................] - ETA: 22s - loss: 1.1546 - acc: 0.5145
15/48 [========>.....................] - ETA: 22s - loss: 1.1461 - acc: 0.5156
16/48 [=========>....................] - ETA: 21s - loss: 1.1523 - acc: 0.5107
17/48 [=========>....................] - ETA: 20s - loss: 1.1557 - acc: 0.5074
18/48 [==========>...................] - ETA: 20s - loss: 1.1583 - acc: 0.5078
19/48 [==========>...................] - ETA: 19s - loss: 1.1594 - acc: 0.5029
20/48 [===========>..................] - ETA: 18s - loss: 1.1587 - acc: 0.5027
21/48 [============>.................] - ETA: 17s - loss: 1.1614 - acc: 0.4989
22/48 [============>.................] - ETA: 17s - loss: 1.1632 - acc: 0.4986
23/48 [=============>................] - ETA: 16s - loss: 1.1669 - acc: 0.4952
24/48 [==============>...............] - ETA: 15s - loss: 1.1688 - acc: 0.4958
25/48 [==============>...............] - ETA: 15s - loss: 1.1659 - acc: 0.4975
26/48 [===============>..............] - ETA: 14s - loss: 1.1642 - acc: 0.4976
27/48 [===============>..............] - ETA: 13s - loss: 1.1625 - acc: 0.4962
28/48 [================>.............] - ETA: 13s - loss: 1.1630 - acc: 0.4958
29/48 [=================>............] - ETA: 12s - loss: 1.1611 - acc: 0.4992
30/48 [=================>............] - ETA: 11s - loss: 1.1640 - acc: 0.4982
31/48 [==================>...........] - ETA: 11s - loss: 1.1632 - acc: 0.5000
32/48 [===================>..........] - ETA: 10s - loss: 1.1638 - acc: 0.4995
33/48 [===================>..........] - ETA: 9s - loss: 1.1643 - acc: 0.4983
34/48 [====================>.........] - ETA: 9s - loss: 1.1649 - acc: 0.4966
35/48 [====================>.........] - ETA: 8s - loss: 1.1655 - acc: 0.4969
36/48 [=====================>........] - ETA: 7s - loss: 1.1646 - acc: 0.4965
37/48 [======================>.......] - ETA: 7s - loss: 1.1632 - acc: 0.4968
38/48 [======================>.......] - ETA: 6s - loss: 1.1621 - acc: 0.4986
39/48 [=======================>......] - ETA: 5s - loss: 1.1631 - acc: 0.4982
40/48 [========================>.....] - ETA: 5s - loss: 1.1612 - acc: 0.4998
41/48 [========================>.....] - ETA: 4s - loss: 1.1601 - acc: 0.5011
42/48 [=========================>....] - ETA: 3s - loss: 1.1610 - acc: 0.5013
43/48 [=========================>....] - ETA: 3s - loss: 1.1621 - acc: 0.5004
44/48 [==========================>...] - ETA: 2s - loss: 1.1623 - acc: 0.4995
45/48 [===========================>..] - ETA: 1s - loss: 1.1649 - acc: 0.4974
46/48 [===========================>..] - ETA: 1s - loss: 1.1642 - acc: 0.4983
47/48 [============================>.] - ETA: 0s - loss: 1.1675 - acc: 0.4968
48/48 [==============================] - ETA: 0s - loss: 1.1677 - acc: 0.4970
48/48 [==============================] - 34s 704ms/step - loss: 1.1677 - acc: 0.4970 - val_loss: 1.2880 - val_acc: 0.4496
<keras.src.callbacks.History object at 0x0000000063F41220>
1/60 [..............................] - ETA: 3s - loss: 1.3318 - acc: 0.4688
3/60 [>.............................] - ETA: 2s - loss: 1.2109 - acc: 0.4583
4/60 [=>............................] - ETA: 2s - loss: 1.2095 - acc: 0.4766
6/60 [==>...........................] - ETA: 2s - loss: 1.2572 - acc: 0.4583
8/60 [===>..........................] - ETA: 2s - loss: 1.2674 - acc: 0.4492
10/60 [====>.........................] - ETA: 2s - loss: 1.2870 - acc: 0.4594
12/60 [=====>........................] - ETA: 2s - loss: 1.2890 - acc: 0.4688
14/60 [======>.......................] - ETA: 2s - loss: 1.2840 - acc: 0.4732
16/60 [=======>......................] - ETA: 1s - loss: 1.2636 - acc: 0.4863
18/60 [========>.....................] - ETA: 1s - loss: 1.2634 - acc: 0.4792
20/60 [=========>....................] - ETA: 1s - loss: 1.2602 - acc: 0.4844
21/60 [=========>....................] - ETA: 1s - loss: 1.2684 - acc: 0.4851
22/60 [==========>...................] - ETA: 1s - loss: 1.2726 - acc: 0.4830
24/60 [===========>..................] - ETA: 1s - loss: 1.2955 - acc: 0.4688
26/60 [============>.................] - ETA: 1s - loss: 1.2920 - acc: 0.4700
28/60 [=============>................] - ETA: 1s - loss: 1.2995 - acc: 0.4598
30/60 [==============>...............] - ETA: 1s - loss: 1.3098 - acc: 0.4583
32/60 [===============>..............] - ETA: 1s - loss: 1.3079 - acc: 0.4580
34/60 [================>.............] - ETA: 1s - loss: 1.2968 - acc: 0.4614
36/60 [=================>............] - ETA: 1s - loss: 1.3054 - acc: 0.4531
38/60 [==================>...........] - ETA: 1s - loss: 1.3054 - acc: 0.4507
39/60 [==================>...........] - ETA: 0s - loss: 1.2975 - acc: 0.4527
40/60 [===================>..........] - ETA: 0s - loss: 1.2963 - acc: 0.4523
42/60 [====================>.........] - ETA: 0s - loss: 1.2990 - acc: 0.4509
44/60 [=====================>........] - ETA: 0s - loss: 1.3005 - acc: 0.4489
46/60 [======================>.......] - ETA: 0s - loss: 1.3018 - acc: 0.4463
48/60 [=======================>......] - ETA: 0s - loss: 1.3012 - acc: 0.4440
49/60 [=======================>......] - ETA: 0s - loss: 1.3095 - acc: 0.4420
51/60 [========================>.....] - ETA: 0s - loss: 1.3120 - acc: 0.4406
53/60 [=========================>....] - ETA: 0s - loss: 1.3123 - acc: 0.4452
55/60 [==========================>...] - ETA: 0s - loss: 1.3122 - acc: 0.4437
57/60 [===========================>..] - ETA: 0s - loss: 1.3107 - acc: 0.4435
59/60 [============================>.] - ETA: 0s - loss: 1.3160 - acc: 0.4423
60/60 [==============================] - 3s 47ms/step - loss: 1.3147 - acc: 0.4429
Test accuracy with CNN: 0.44286465644836426
rnnmodel = Sequential() #define model type
rnnmodel.add(Embedding(MAX_NUM_WORDS, 128))
rnnmodel.add(LSTM(128, dropout = 0.2, recurrent_dropout = 0.2))
rnnmodel.add(Dense(y_train.shape[1], activation='softmax'))
rnnmodel.compile(loss = 'categorical_crossentropy',
optimizer = 'rmsprop',
metrics = ['acc']) # define output rnnmodel.fit(x_train, y_train,
batch_size = 128,
epochs = 1, # whew these are slow
validation_data=(x_val, y_val))
1/48 [..............................] - ETA: 6:50 - loss: 1.6107 - acc: 0.1953
2/48 [>.............................] - ETA: 7:30 - loss: 1.6018 - acc: 0.3125
3/48 [>.............................] - ETA: 8:10 - loss: 1.5934 - acc: 0.3490
4/48 [=>............................] - ETA: 8:14 - loss: 1.5844 - acc: 0.3711
5/48 [==>...........................] - ETA: 8:11 - loss: 1.5735 - acc: 0.3922
6/48 [==>...........................] - ETA: 8:06 - loss: 1.5512 - acc: 0.3971
7/48 [===>..........................] - ETA: 8:01 - loss: 1.5462 - acc: 0.4040
8/48 [====>.........................] - ETA: 7:55 - loss: 1.5293 - acc: 0.4111
9/48 [====>.........................] - ETA: 7:50 - loss: 1.5119 - acc: 0.4175
10/48 [=====>........................] - ETA: 7:44 - loss: 1.5045 - acc: 0.4094
11/48 [=====>........................] - ETA: 7:36 - loss: 1.4981 - acc: 0.4027
12/48 [======>.......................] - ETA: 7:27 - loss: 1.4918 - acc: 0.3945
13/48 [=======>......................] - ETA: 7:18 - loss: 1.4816 - acc: 0.3864
14/48 [=======>......................] - ETA: 7:09 - loss: 1.4701 - acc: 0.3862
15/48 [========>.....................] - ETA: 6:59 - loss: 1.4586 - acc: 0.3885
16/48 [=========>....................] - ETA: 6:50 - loss: 1.4511 - acc: 0.3857
17/48 [=========>....................] - ETA: 6:40 - loss: 1.4375 - acc: 0.3897
18/48 [==========>...................] - ETA: 6:30 - loss: 1.4407 - acc: 0.3832
19/48 [==========>...................] - ETA: 6:19 - loss: 1.4339 - acc: 0.3857
20/48 [===========>..................] - ETA: 6:08 - loss: 1.4298 - acc: 0.3891
21/48 [============>.................] - ETA: 5:56 - loss: 1.4299 - acc: 0.3880
22/48 [============>.................] - ETA: 5:44 - loss: 1.4231 - acc: 0.3913
23/48 [=============>................] - ETA: 5:33 - loss: 1.4199 - acc: 0.3957
24/48 [==============>...............] - ETA: 5:21 - loss: 1.4163 - acc: 0.3975
25/48 [==============>...............] - ETA: 5:09 - loss: 1.4139 - acc: 0.3966
26/48 [===============>..............] - ETA: 4:58 - loss: 1.4103 - acc: 0.3975
27/48 [===============>..............] - ETA: 4:45 - loss: 1.4100 - acc: 0.3973
28/48 [================>.............] - ETA: 4:33 - loss: 1.4098 - acc: 0.3948
29/48 [=================>............] - ETA: 4:20 - loss: 1.4067 - acc: 0.3952
30/48 [=================>............] - ETA: 4:07 - loss: 1.4052 - acc: 0.3984
31/48 [==================>...........] - ETA: 3:54 - loss: 1.4027 - acc: 0.3972
32/48 [===================>..........] - ETA: 3:41 - loss: 1.3987 - acc: 0.4011
33/48 [===================>..........] - ETA: 3:28 - loss: 1.3953 - acc: 0.4032
34/48 [====================>.........] - ETA: 3:15 - loss: 1.3923 - acc: 0.4037
35/48 [====================>.........] - ETA: 3:02 - loss: 1.3914 - acc: 0.4025
36/48 [=====================>........] - ETA: 2:48 - loss: 1.3905 - acc: 0.4041
37/48 [======================>.......] - ETA: 2:35 - loss: 1.3869 - acc: 0.4079
38/48 [======================>.......] - ETA: 2:21 - loss: 1.3855 - acc: 0.4093
39/48 [=======================>......] - ETA: 2:08 - loss: 1.3850 - acc: 0.4093
40/48 [========================>.....] - ETA: 1:54 - loss: 1.3822 - acc: 0.4104
41/48 [========================>.....] - ETA: 1:40 - loss: 1.3821 - acc: 0.4103
42/48 [=========================>....] - ETA: 1:26 - loss: 1.3829 - acc: 0.4085
43/48 [=========================>....] - ETA: 1:12 - loss: 1.3800 - acc: 0.4086
44/48 [==========================>...] - ETA: 57s - loss: 1.3787 - acc: 0.4096
45/48 [===========================>..] - ETA: 43s - loss: 1.3763 - acc: 0.4122
46/48 [===========================>..] - ETA: 29s - loss: 1.3782 - acc: 0.4117
47/48 [============================>.] - ETA: 14s - loss: 1.3768 - acc: 0.4116
48/48 [==============================] - ETA: 0s - loss: 1.3771 - acc: 0.4117
48/48 [==============================] - 713s 15s/step - loss: 1.3771 - acc: 0.4117 - val_loss: 1.3053 - val_acc: 0.4286
<keras.src.callbacks.History object at 0x000000001200A580>
1/15 [=>............................] - ETA: 24s - loss: 1.2630 - acc: 0.4609
2/15 [===>..........................] - ETA: 21s - loss: 1.2761 - acc: 0.4688
3/15 [=====>........................] - ETA: 19s - loss: 1.2766 - acc: 0.4453
4/15 [=======>......................] - ETA: 18s - loss: 1.2697 - acc: 0.4551
5/15 [=========>....................] - ETA: 16s - loss: 1.2800 - acc: 0.4484
6/15 [===========>..................] - ETA: 15s - loss: 1.3160 - acc: 0.4375
7/15 [=============>................] - ETA: 13s - loss: 1.3138 - acc: 0.4263
8/15 [===============>..............] - ETA: 11s - loss: 1.3198 - acc: 0.4229
9/15 [=================>............] - ETA: 10s - loss: 1.3192 - acc: 0.4219
10/15 [===================>..........] - ETA: 8s - loss: 1.3141 - acc: 0.4234
11/15 [=====================>........] - ETA: 6s - loss: 1.3151 - acc: 0.4219
12/15 [=======================>......] - ETA: 5s - loss: 1.3147 - acc: 0.4219
13/15 [=========================>....] - ETA: 3s - loss: 1.3175 - acc: 0.4225
14/15 [===========================>..] - ETA: 1s - loss: 1.3238 - acc: 0.4202
15/15 [==============================] - ETA: 0s - loss: 1.3255 - acc: 0.4218
15/15 [==============================] - 25s 2s/step - loss: 1.3255 - acc: 0.4218
Test accuracy with RNN: 0.4218009412288666
lime is a package that helps us explain modelseli5 can help explain the whole modellogreg = LogisticRegression(max_iter = 10000)
logreg.fit(tv_train, Y_train) # new model on smaller data LogisticRegression(max_iter=10000)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression(max_iter=10000)
precision recall f1-score support
P1 0.46 0.14 0.22 220
P2 0.41 0.41 0.41 602
P3 0.51 0.73 0.60 800
P4 0.61 0.26 0.36 227
P5 0.00 0.00 0.00 50
accuracy 0.48 1899
macro avg 0.40 0.31 0.32 1899
weighted avg 0.47 0.48 0.45 1899
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\KJordan\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
probs = pd.DataFrame(logreg.predict_proba(tv_test), columns = ["p1", "p2", "p3", "p4", "p5"])
probs['answer'] = Y_test.reset_index(drop=True)
probs['text'] = X_test.reset_index(drop=True)
probs['predicted_cat'] = y_log
right_answers = probs[probs.answer == probs.predicted_cat]
## show DF
print(right_answers) p1 ... predicted_cat
2 0.447333 ... P1
3 0.041254 ... P3
4 0.041799 ... P3
6 0.507083 ... P1
9 0.044860 ... P3
... ... ... ...
1892 0.064510 ... P3
1894 0.047989 ... P3
1895 0.011649 ... P3
1897 0.080342 ... P3
1898 0.109943 ... P2
[921 rows x 8 columns]
(5, 16547)
(16547,)
DF_coef = pd.DataFrame(logreg.coef_.T, columns = ["p1", "p2", "p3", "p4", "p5"])
DF_coef['words'] = tfidf.get_feature_names_out().T
## show DF
print(DF_coef) p1 p2 p3 p4 p5 words
0 -0.192871 -0.062680 0.046857 0.242046 -0.033353 aa
1 -0.114039 -0.155780 -0.031039 0.333057 -0.032199 aaa
2 0.058467 -0.017540 -0.030493 -0.008625 -0.001810 aaaaaaargh
3 -0.003839 -0.012005 -0.013376 0.030560 -0.001340 aaah
4 -0.003839 -0.012005 -0.013376 0.030560 -0.001340 aaargh
... ... ... ... ... ... ...
16542 -0.120513 0.522092 -0.225924 -0.142327 -0.033327 zzz
16543 -0.058731 0.311237 -0.184166 -0.057910 -0.010430 zzzz
16544 -0.008182 -0.017672 0.038075 -0.008492 -0.003729 子供容認
16545 -0.003798 -0.018088 0.030160 -0.006590 -0.001684 输入
16546 -0.003798 -0.018088 0.030160 -0.006590 -0.001684 输出
[16547 rows x 6 columns]
from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names = Y_train.sort_values().unique())
id_values = [1,14,435,550,755]
for id_value in id_values:
print('Document id: %d' % id_value)
print('Predicted class =', right_answers.iloc[id_value]["predicted_cat"])
print('True class: %s' % right_answers.iloc[id_value]["answer"])
print(right_answers.iloc[id_value]['text'])Document id: 1
Predicted class = P3
True class: P3
optimize imports introduces unused import inner classes subclassed example class a class ax import class b class bx extends ax obviously import unneccessary import a suffice optimize imports introduces correctly marked warning afterwards might relate
Document id: 14
Predicted class = P3
True class: P3
search references show subclassed methods consider public class bar protected void dosomething public class foo extends bar protected void dosomething select dosomething search references none found i expected dosomething considered reference this problem i use search reference figure i delete code example class foo public void dosomething method empty i delete protected void eclipse already aware particular problem try change method signature change dosomething private preview eclipse warns behaviour program might change
Document id: 435
Predicted class = P3
True class: P3
rfe open file class typing name in project many nested directories packages often frustrating click navigate particular file want it even distracting dont remember exact package java class lives eclipse needs way open file class name pressing control key bring small dialog box containing text field as type file name class name text field see list displaying current project match typed the must work navigate list hitting enter open selected idea feature i find immensely useful note java classes files files contain java classes idea solves two separate functions one open java class name another open file name its possible single function merges sets overriding filename class foo map file would provide better ui also first pass implementation without would welcome thats easier implement
Document id: 550
Predicted class = P2
True class: P2
creating junit testcase gives error whenever i upgrade eclipse i always rename old version install new directory called eclipse copy previous workspace everything works fine except i try create junit testcase the testcase created wizard puts error dialog title new saying creation element failed reason testfileutilities exist i currently got build also happened i upgraded the contents log session dec microsystems bootloader constants arguments file d entry dec message error stack java model exception java model status testfileutilities exist arkers native method entry dec message testfileutilities exist
Document id: 755
Predicted class = P3
True class: P3
changing jre source attachment triggers build when source attachment archive jres libaries changed build path build triggered however changing source attachment trigger build
import eli5
eli5.show_weights(estimator = logreg, top = 10, feature_names = tfidf.get_feature_names_out())| y=P1 top features | y=P2 top features | y=P3 top features | y=P4 top features | y=P5 top features | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
|
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||